package net.bw.realtime.jtp.dws.log.utils;

import com.huaban.analysis.jieba.JiebaSegmenter;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;

public  class JiebaUtil {

    public static List<String> jiebaAnalyzer(String content) throws IOException {

        List<String> stop_words = FileUtils.readLines(
                new File(
                        "F:\\2208\\realtime-project-03liuyawei\\jtp-realtime-liuyawei\\jtp-realtime-dws\\jtp-realtime-dws-log\\src\\main\\resources\\stop_words.txt"
                )
        );

        JiebaSegmenter segmenter = new JiebaSegmenter();
        List<String> result = segmenter.sentenceProcess(content);
        result = result.stream().map(o -> o.trim()).filter(o -> !stop_words.contains(o)).collect(Collectors.toList());

        return result;

    }

    public static void main(String[] args) throws IOException {

        String content="《开端》《镜双城》《淘金》三部热播剧，都有她，你发现了吗？";

        List<String> jiebaAnalyzer = JiebaUtil.jiebaAnalyzer(content);
        jiebaAnalyzer.forEach(System.out::println);

    }
}
